packages <- c("tidyverse","cowplot","ggtree","ggnewscale","ape","phytools","phyloAMR","gridExtra")
#Load Packages
invisible(lapply(packages,library,character.only=T,quietly=T))
#Scripts
source("./lib/consistent_themes.R")
source("./lib/common_functions.R")
source("./scripts/GWAS/analysis/GWAS_analysis_scripts.R")
# Print
Sys.info()
sysname
"Darwin"
release
"24.4.0"
version
"Darwin Kernel Version 24.4.0: Wed Mar 19 21:16:31 PDT 2025; root:xnu-11417.101.15~1/RELEASE_X86_64"
nodename
"msla0572.local"
machine
"x86_64"
login
"root"
user
"kgontjes"
effective_user
"kgontjes"
sessionInfo()
R version 4.5.0 (2025-04-11)
Platform: x86_64-apple-darwin20
Running under: macOS Sequoia 15.4
Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.1
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
time zone: America/Detroit
tzcode source: internal
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] kableExtra_1.4.0 gridExtra_2.3 phyloAMR_0.1.0 phytools_2.4-4
[5] maps_3.4.2.1 ape_5.8-1 ggnewscale_0.5.1 ggtree_3.16.0
[9] cowplot_1.1.3 lubridate_1.9.4 forcats_1.0.0 stringr_1.5.1
[13] dplyr_1.1.4 purrr_1.0.4 readr_2.1.5 tidyr_1.3.1
[17] tibble_3.2.1 ggplot2_3.5.2 tidyverse_2.0.0 knitr_1.50
loaded via a namespace (and not attached):
[1] tidyselect_1.2.1 viridisLite_0.4.2 farver_2.1.2
[4] optimParallel_1.0-2 fastmap_1.2.0 lazyeval_0.2.2
[7] combinat_0.0-8 digest_0.6.37 timechange_0.3.0
[10] lifecycle_1.0.4 tidytree_0.4.6 magrittr_2.0.3
[13] compiler_4.5.0 rlang_1.1.6 sass_0.4.10
[16] tools_4.5.0 igraph_2.1.4 yaml_2.3.10
[19] phangorn_2.12.1 clusterGeneration_1.3.8 mnormt_2.1.1
[22] scatterplot3d_0.3-44 xml2_1.3.8 RColorBrewer_1.1-3
[25] aplot_0.2.5 expm_1.0-0 withr_3.0.2
[28] numDeriv_2016.8-1.1 grid_4.5.0 colorspace_2.1-1
[31] scales_1.4.0 iterators_1.0.14 MASS_7.3-65
[34] cli_3.6.5 rmarkdown_2.29 treeio_1.32.0
[37] generics_0.1.3 rstudioapi_0.17.1 tzdb_0.5.0
[40] cachem_1.1.0 parallel_4.5.0 ggplotify_0.1.2
[43] yulab.utils_0.2.0 vctrs_0.6.5 Matrix_1.7-3
[46] jsonlite_2.0.0 gridGraphics_0.5-1 hms_1.1.3
[49] patchwork_1.3.0 hues_0.2.0 systemfonts_1.2.2
[52] foreach_1.5.2 jquerylib_0.1.4 glue_1.8.0
[55] codetools_0.2-20 DEoptim_2.2-8 stringi_1.8.7
[58] gtable_0.3.6 quadprog_1.5-8 pillar_1.10.2
[61] htmltools_0.5.8.1 R6_2.6.1 doParallel_1.0.17
[64] evaluate_1.0.3 lattice_0.22-7 ggfun_0.1.8
[67] bslib_0.9.0 Rcpp_1.0.14 fastmatch_1.1-6
[70] svglite_2.1.3 coda_0.19-4.1 nlme_3.1-168
[73] xfun_0.52 fs_1.6.6 pkgconfig_2.0.3
df <- readRDS("./data/dataset/df.RDS")
pclustering <- readRDS("./data/asr_clustering/blbli_asr_clustering_df.RDS")
tr <- read.tree("./data/tree/tree.treefile")
gwas_table <- readRDS("./data/GWAS/hits/gwas_hits_table.RDS") %>% subset(nn_qc=="Yes" & locus_tag != '')
gwas_hits <- readRDS("./data/GWAS/hits/gwas_mat.RDS")
nn <- readRDS("./data/nearest_neighbor_comparisons/nn_comparisons.RDS")
df <- left_join(df,gwas_hits %>% mutate(isolate_no = rownames(.))) %>% left_join(.,pclustering)
figure_3.A_tb <- gwas_table %>% select(locus_tag,gene,product,resistance_category,sig_tests,sig_models_simple,sig_phenotypes)
figure_3.A_tb$locus_tag <- recode(figure_3.A_tb$locus_tag,"AA018"="AA018 KPC plasmid cluster","AA552"="AA552 KPC plasmid cluster","OmpK36_loop3_insertion"="Loop 3 insertion in ompK36","OmpK36GD"="GD loop 3 insertion in ompK36",'OmpK36_c25t' = 'C25T SNP in ompK36','OmpK36_truncation_kleborate'='Truncation of ompK36')
figure_3.A_tb$gene <- ifelse(grepl(pattern = 'ompK36',figure_3.A_tb$locus_tag) | figure_3.A_tb$locus_tag =='KPNIH1_RS18665',"ompK36",figure_3.A_tb$gene) %>% {ifelse(is.na(.),"",.)}
figure_3.A_tb$product <- ifelse(figure_3.A_tb$locus_tag == "KPNIH1_RS18100","mdtA/muxA family multidrug efflux transporter periplasmic adaptor subunit",figure_3.A_tb$product)
figure_3.A_tb$product <- ifelse(figure_3.A_tb$gene=='ompK36','porin OmpC',figure_3.A_tb$product)
figure_3.A_tb$product <- ifelse(grepl(pattern = 'KPC',figure_3.A_tb$locus_tag),"blaKPC-associated plasmid",figure_3.A_tb$product)
figure_3.A_tbl <- figure_3.A_tb %>% select(locus_tag,gene,product,resistance_category,sig_tests,sig_models_simple,sig_phenotypes) %>% `colnames<-`(c('Significant Hit','Gene Hit', 'Product','Genotype Category', 'Significant Tests','Significant Models', 'Significant Phenotypes'))
figure_3.A <- figure_3.A_tbl %>% tableGrob(rows = NULL,theme=mytheme_GWAS)
rownames(df) <- df$isolate_no
figure_3.df_mat <- get_gwas_hit_matrix(gwas_table,df)
sig_hits_name <- recode(colnames(figure_3.df_mat),"AA018"="AA018 KPC plasmid cluster","AA552"="AA552 KPC plasmid cluster","OmpK36_loop3_insertion"="Loop 3 insertion in ompK36","OmpK36GD"="GD loop 3 insertion in ompK36",'OmpK36_c25t' = 'C25T SNP in ompK36','OmpK36_truncation_kleborate'='Truncation of ompK36')
figure_3.phylo <- gwas_figures(df,tr,figure_3.df_mat,sig_hits_name = sig_hits_name %>% gsub("_1|_2|_1.2|_2.2|_3|_3.2","",.)) + ylim(NA,550)
figure_3.FB <- plot_grid(figure_3.phylo,labels = "B",label_size=62)
figure_3.FB
gwas_nons_freq <- variants_freq(gwas_table$genotype,df)
gwas_nons_freq$locus_tag <- recode(gwas_nons_freq$locus_tag,"AA018"="AA018 KPC plasmid cluster","AA552"="AA552 KPC plasmid cluster","OmpK36_loop3_insertion"="Loop 3 insertion in ompK36","OmpK36GD"="GD loop 3 insertion in ompK36",'OmpK36_c25t' = 'C25T SNP in ompK36','OmpK36_truncation_kleborate'='Truncation of ompK36')
gwas_nons_freq$`Significant Hit` <- gwas_nons_freq$locus_tag
gwas_nons_freq$`Significant Hit` <- factor(gwas_nons_freq$`Significant Hit`, levels=rev(unique(figure_3.A_tb$locus_tag)))
figure_3.C <- freq_plot(gwas_nons_freq)
# D. FC MIC (MVB * IR overlay)
nn_data_melt <- lapply(gwas_table$genotype,generate_nn_melt_data,nn_data=nn) %>% do.call(rbind,.)
nn_data_melt$locus_tag <- recode(nn_data_melt$locus_tag,"AA018"="AA018 KPC plasmid cluster","AA552"="AA552 KPC plasmid cluster","OmpK36_loop3_insertion"="Loop 3 insertion in ompK36","OmpK36GD"="GD loop 3 insertion in ompK36",'OmpK36_c25t' = 'C25T SNP in ompK36','OmpK36_truncation_kleborate'='Truncation of ompK36')
nn_data_melt$locus_tag <- factor(nn_data_melt$locus_tag, levels=rev(unique(figure_3.A_tb$locus_tag)))
figure_3.D <- nn_plot(nn_data_melt)
figure_3.FCD <- plot_grid(figure_3.C,figure_3.D ,nrow = 2,labels = c("C","D"),label_size = 62,align = "hv",label_x = -0.02) + theme(plot.margin = unit(c(t=0,r=0,b=0,l=0.1), "cm"))
figure_3.FCDB <- plot_grid(figure_3.FB,figure_3.FCD,ncol = 2,rel_heights = c(1,1),rel_widths = c(1,0.625)) + theme(plot.margin = unit(c(-40,0,0,0), "cm"))
figure_3.FA <- plot_grid(figure_3.A,labels = "A.",label_size=62,align = "hv") + theme(plot.margin = unit(c(-40,0,0,0), "cm"))
figure_3 <- plot_grid(figure_3.FA,figure_3.FCDB,labels = c("A",""),nrow=2,label_size = 62) + theme(plot.margin = unit(c(0,0,0.25,0), "cm"))
figure_3